# 加载必要的包
if (!require(caret)) install.packages("caret")
if (!require(pROC)) install.packages("pROC")
if (!require(MatchIt)) install.packages("MatchIt")

library(caret)
library(pROC)
library(MatchIt)
library(randomForest)
# 设置工作目录并加载数据
setwd("${path}")
mydata <- read.csv("data.csv")
# 加载rpart包
if (!require(rpart)) install.packages("rpart")
library(rpart)
# 确保 ${hotspot} 是因子，并且值为 "event_0" 和 "event_1"
mydata$${hotspot} <- factor(mydata$${hotspot}, levels = c("0", "1"), labels = c("event_0", "event_1"))


# 使用 matchit 进行 1:3 匹配
m.out <- matchit(${hotspot} ~  ${independent_and}, 
                 data = mydata, 
                 method = "nearest", 
                 ratio = 3)

# 提取匹配后的数据
matched_data <- match.data(m.out)

# 训练决策树模型
model_tree <- rpart(
  ${hotspot} ~  ${independent_and},
  data = matched_data,
  method = "class"
)
# 直接从 rpart 模型中提取变量重要性
var_imp <- model_tree$variable.importance

# 将其转换为数据框以便于排序和查看
var_imp_df <- data.frame(Variable = names(var_imp), Importance = var_imp, row.names = NULL)

# 按重要性降序排列
var_imp_df <- var_imp_df[order(-var_imp_df$Importance), ]

# 打印变量的重要性
print(var_imp)

# 根据变量重要性选择前 n 个最重要的变量 (例如前5个)
n <- 6
selected_vars <- var_imp_df$Variable[1:n]

# 构建最终公式
final_formula <- as.formula(paste("${hotspot} ~", paste(selected_vars, collapse = " + ")))

# 使用选定的变量重新训练决策树模型
model_tree <- rpart(final_formula,
                    data = matched_data,
                    method = "class",
                    control = rpart.control(cp = 0.01))
# 预测和评估
predictions_tree <- predict(model_tree, newdata = matched_data, type = "prob")[,2]
predicted_classes_tree <- factor(ifelse(predictions_tree >= 0.5, "event_1", "event_0"), levels = c("event_0", "event_1"))

# 添加预测结果到数据集
mydata_with_predictions_tree <- matched_data %>%
  mutate(
    ${hotspot}_probability = predictions_tree,
    ${hotspot}_predicted_class = predicted_classes_tree,
    ${hotspot}_score = NA  # 决策树没有评分卡
  )
# 计算 ${hotspot} 和 ${hotspot}_predicted_class 相同的数据数量
matching_rows <- mydata_with_predictions_tree %>%
  filter(${hotspot} == ${hotspot}_predicted_class) %>%
  nrow()
# 打印结果
cat("Number of rows where ${hotspot} and ${hotspot}_predicted_class match:", matching_rows, "\n")
# 可选：计算准确率（匹配行数 / 总行数）
total_rows <- nrow(mydata_with_predictions_tree)
accuracy <- matching_rows / total_rows
# 打印结果
cat("Number of rows where ${hotspot} and ${hotspot}_predicted_class match:", accuracy, "\n")
# 这是建模集的结果
write.csv(mydata_with_predictions_tree, file = "mydata_with_predictions_tree.csv", row.names = FALSE)
#### 决策树图 ####
# 安装并加载rpart.plot包用于绘制更美观的决策树图
# 安装并加载rpart.plot包用于绘制更美观的决策树图
if (!require(rpart.plot)) install.packages("rpart.plot")
library(rpart.plot)
Cairo::CairoTIFF(file="decisionTreeDiagram_tree.tiff", width=800, height=800,units="in",dpi=150)
# 绘制决策树
rpart.plot(model_tree, 
           main="Decision Tree for Matched Data", 
           type=4, extra=101,
           branch=0.5, shadow.col="gray", 
           box.palette="GnBu", # 使用色板增加对比度
           nn=TRUE,          # 显示节点编号
           fallen.leaves=TRUE, # 将叶节点放在底部
           cex=0.8)          # 调整文本大小
dev.off()
#### ROC曲线与AUC值 ####
# 如果你的数据格式不同，请相应调整以下代码
roc_curve <- roc(matched_data$${hotspot}, predictions_tree, plot = FALSE)
Cairo::CairoTIFF(file="roc_tree.tiff", width=800, height=800,units="in",dpi=150)
plot(roc_curve,
     main="ROC Curve for Decision Tree Model", # 图表标题
     col="#1c61b6", # 曲线颜色
     lwd=2, # 线条宽度
     print.auc=TRUE, # 显示AUC值
     auc.polygon=TRUE, # 绘制AUC下的多边形
     grid=c(0.1, 0.2), # 添加网格线
     grid.col=c("green", "red"), # 网格线颜色
     max.auc.polygon=TRUE, # 在最大AUC下绘制多边形
     auc.polygon.col="skyblue", # AUC多边形颜色
     print.thres=TRUE) # 显示阈值
dev.off()

#### 特征重要性图 ####
# 创建条形图
Cairo::CairoTIFF(file="importanceFeatures_tree.tiff", width=800, height=800,units="in",dpi=150)
ggplot(var_imp_df, aes(x = reorder(Variable, Importance), y = Importance)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  theme_minimal() +
  coord_flip() + # 将坐标轴翻转以便更好地阅读标签
  labs(title = "Feature Importance from Decision Tree",
       x = "Variable",
       y = "Importance Score") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        plot.title = element_text(hjust = 0.5))
dev.off()
#### 模型比较表 ####
library(kableExtra)

performance_metrics <- data.frame(
  Metric = c("Accuracy", "Precision", "Recall", "F1 Score", "AUC"),
  Value = c(conf_matrix$overall["Accuracy"], 
            conf_matrix$byClass["Precision"], 
            conf_matrix$byClass["Recall"], 
            (2 * conf_matrix$byClass["Precision"] * conf_matrix$byClass["Recall"]) / 
              (conf_matrix$byClass["Precision"] + conf_matrix$byClass["Recall"]), 
            auc_value)
)
write.csv(print(performance_metrics), "performanceMetrics_tree.csv")

mydata1 <- read.csv("result.csv")
# 预测和评估
predictions_tree <- predict(model_tree, newdata = mydata1, type = "prob")[,2]
predicted_classes_tree <- factor(ifelse(predictions_tree >= 0.5, "event_1", "event_0"), levels = c("event_0", "event_1"))

# 添加预测结果到数据集
result_with_predictions_tree <- mydata1 %>%
  mutate(
    ${hotspot}_probability = predictions_tree,
    ${hotspot}_predicted_class = predicted_classes_tree,
    ${hotspot}_score = NA  # 决策树没有评分卡
  )
# 这是结局的结果
write.csv(result_with_predictions_tree, file = "result_with_predictions_tree.csv", row.names = FALSE)
